home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Almathera Ten Pack 3: CDPD 3
/
Almathera Ten on Ten - Disc 3: CDPD3.iso
/
scope
/
051-075
/
scopedisk75
/
ispell
/
src
/
buildhash.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-03-19
|
6KB
|
327 lines
/* -*- Mode: Text -*- */
/*
* buildhash.c - make a hash table for ispell
*
* Pace Willisson, 1983
*/
#include <stdio.h>
#ifndef AMIGA /* i am not sure why this is here */
#include <sys/types.h>
#endif
#include <stat.h>
#include "ispell.h"
#define DICT "dict.191"
#define COUNT "count.191"
#define STATS "stats.191"
#define NSTAT 100
struct stat dstat, cstat;
int numwords, hashsize;
char *malloc();
struct dent *hashtbl;
main ()
{
FILE *countf;
FILE *statf;
int stats[NSTAT];
int i;
if (stat (DICT, &dstat) < 0) {
fprintf (stderr, "No dictionary (%s)\n", DICT);
exit (1);
}
if (stat (COUNT, &cstat) < 0 || dstat.st_mtime > cstat.st_mtime)
newcount ();
if ((countf = fopen (COUNT, "r")) == NULL) {
fprintf (stderr, "No count file\n");
exit (1);
}
numwords = 0;
fscanf (countf, "%d", &numwords);
fclose (countf);
if (numwords == 0) {
fprintf (stderr, "Bad count file\n");
exit (1);
}
hashsize = numwords;
readdict ();
if ((statf = fopen (STATS, "w")) == NULL) {
fprintf (stderr, "Can't create %s\n", STATS);
exit (1);
}
for (i = 0; i < NSTAT; i++)
stats[i] = 0;
for (i = 0; i < hashsize; i++) {
struct dent *dp;
int j;
if (hashtbl[i].used == 0) {
stats[0]++;
} else {
for (j = 1, dp = &hashtbl[i]; dp->next != NULL; j++, dp = dp->next)
;
if (j >= NSTAT)
j = NSTAT - 1;
stats[j]++;
}
}
for (i = 0; i < NSTAT; i++)
fprintf (statf, "%d: %d\n", i, stats[i]);
fclose (statf);
filltable ();
output ();
}
output ()
{
FILE *outfile;
struct hashheader hashheader;
int strptr, n, i;
if ((outfile = fopen ("ispell.hash", "w")) == NULL) {
fprintf (stderr, "can't create ispell.hash\n");
return;
}
hashheader.magic = MAGIC;
hashheader.stringsize = 0;
hashheader.tblsize = hashsize;
fwrite (&hashheader, sizeof hashheader, 1, outfile);
strptr = 0;
for (i = 0; i < hashsize; i++) {
n = strlen (hashtbl[i].word) + 1;
fwrite (hashtbl[i].word, n, 1, outfile);
hashtbl[i].word = (char *)strptr;
strptr += n;
}
for (i = 0; i < hashsize; i++) {
if (hashtbl[i].next != 0) {
int x;
x = hashtbl[i].next - hashtbl;
hashtbl[i].next = (struct dent *)x;
} else {
hashtbl[i].next = (struct dent *)-1;
}
}
fwrite (hashtbl, sizeof (struct dent), hashsize, outfile);
hashheader.stringsize = strptr;
rewind (outfile);
fwrite (&hashheader, sizeof hashheader, 1, outfile);
fclose (outfile);
}
filltable ()
{
struct dent *freepointer, *nextword, *dp;
int i;
for (freepointer = hashtbl; freepointer->used; freepointer++)
;
for (nextword = hashtbl, i = numwords; i != 0; nextword++, i--) {
if (nextword->used == 0) {
continue;
}
if (nextword->next == NULL) {
continue;
}
if (nextword->next >= hashtbl && nextword->next < hashtbl + hashsize) {
continue;
}
dp = nextword;
while (dp->next) {
if (freepointer > hashtbl + hashsize) {
fprintf (stderr, "table overflow\n");
getchar ();
break;
}
*freepointer = *(dp->next);
dp->next = freepointer;
dp = freepointer;
while (freepointer->used)
freepointer++;
}
}
}
readdict ()
{
struct dent d;
char lbuf[100];
FILE *dictf;
int i;
int h;
char *p;
if ((dictf = fopen (DICT, "r")) == NULL) {
fprintf (stderr, "Can't open dictionary\n");
exit (1);
}
hashtbl = (struct dent *) calloc (numwords, sizeof (struct dent));
if (hashtbl == NULL) {
fprintf (stderr, "couldn't allocate hash table\n");
exit (1);
}
i = 0;
while (fgets (lbuf, sizeof lbuf, dictf) != NULL) {
if (i % 1000 == 0) {
printf ("%d ", i);
fflush (stdout);
}
i++;
p = &lbuf [ strlen (lbuf) - 1 ];
if (*p == '\n')
*p = 0;
if (makedent (lbuf, &d) < 0)
continue;
d.word = malloc (strlen (lbuf) + 1);
if (d.word == NULL) {
fprintf (stderr, "couldn't allocate space for word %s\n", lbuf);
exit (1);
}
strcpy (d.word, lbuf);
h = hash (lbuf, strlen (lbuf), hashsize);
if (hashtbl[h].used == 0) {
hashtbl[h] = d;
} else {
struct dent *dp;
dp = (struct dent *) malloc (sizeof (struct dent));
if (dp == NULL) {
fprintf (stderr, "couldn't allocate space for collision\n");
exit (1);
}
*dp = d;
dp->next = hashtbl[h].next;
hashtbl[h].next = dp;
}
}
printf ("\n");
}
/*
* fill in the flags in d, and put a null after the word in s
*/
makedent (lbuf, d)
char *lbuf;
struct dent *d;
{
char *p, *index();
d->next = NULL;
d->used = 1;
d->v_flag = 0;
d->n_flag = 0;
d->x_flag = 0;
d->h_flag = 0;
d->y_flag = 0;
d->g_flag = 0;
d->j_flag = 0;
d->d_flag = 0;
d->t_flag = 0;
d->r_flag = 0;
d->z_flag = 0;
d->s_flag = 0;
d->p_flag = 0;
d->m_flag = 0;
p = index (lbuf, '/');
if (p != NULL)
*p = 0;
if (strlen (lbuf) > WORDLEN - 1) {
printf ("%s: word too big\n");
return (-1);
}
if (p == NULL)
return (0);
p++;
while (*p != NULL) {
switch (*p) {
case 'V': d->v_flag = 1; break;
case 'N': d->n_flag = 1; break;
case 'X': d->x_flag = 1; break;
case 'H': d->h_flag = 1; break;
case 'Y': d->y_flag = 1; break;
case 'G': d->g_flag = 1; break;
case 'J': d->j_flag = 1; break;
case 'D': d->d_flag = 1; break;
case 'T': d->t_flag = 1; break;
case 'R': d->r_flag = 1; break;
case 'Z': d->z_flag = 1; break;
case 'S': d->s_flag = 1; break;
case 'P': d->p_flag = 1; break;
case 'M': d->m_flag = 1; break;
case 0:
fprintf (stderr, "no key word %s\n", lbuf);
continue;
default:
fprintf (stderr, "unknown flag %c word %s\n",
*p, lbuf);
break;
}
p++;
if (*p != '/' && *p != NULL && *p != '\n') {
fprintf (stderr, "bad format %s (%c 0%o)\n",
lbuf, *p, *p);
break;
}
if (*p)
p++;
}
return (0);
}
newcount ()
{
char buf[200];
FILE *d;
int i;
fprintf (stderr, "Counting words in dictionary ...\n");
if ((d = fopen (DICT, "r")) == NULL) {
fprintf (stderr, "Can't open dictionary\n");
exit (1);
}
i = 0;
while (fgets (buf, sizeof buf, d) != NULL) {
i++;
if (i % 1000 == 0) {
printf ("%d ", i);
fflush (stdout);
}
}
fclose (d);
printf ("\n%d words\n", i);
if ((d = fopen (COUNT, "w")) == NULL) {
fprintf (stderr, "can't create %s\n", COUNT);
exit (1);
}
fprintf (d, "%d\n", i);
fclose (d);
}